home *** CD-ROM | disk | FTP | other *** search
- #include "b:stdio.h"
- #include "b:grep.h"
-
- /*
- * This module contains the utilities for use by the GREP
- * routine to match patterns. Routines are in alphabetical
- * order.
- */
-
- int amatch( lin , pat, boln)
- char *lin, *boln ;
- TOKEN *pat ;
- {
-
- /* Scans throught the pattern template looking for a match
- * with lin. Each element of lin is compared with the template
- * until either a mis-match is found or the end of the tempalte
- * is reached. In teh formaer cas a zero is returned; in the
- * latter, a pointer into lin ( pointing to the last character in
- * in the matched pattern) is returned .
- *
- * 'lin' is a pointer to the line being searched
- * 'pat' is a pointer to the template made by matchpat()
- * 'boln' is a pointer into 'lin' which points to the character
- * at the beginning of the line.
- *
- */
-
- register char *bocl, *rval, *strstart ;
-
- if( pat == 0)
- return(0) ;
-
- strstart = lin ;
-
- while( pat ) {
- if( pat->tok == CLOSURE && pat->next ) {
-
- /* Process a closure: First skip over the closure token
- * to the object to be repeated
- */
-
- pat = pat->next ;
-
- /* Now match as many occurances of the closure pattern as
- * possible
- */
-
- bocl = lin ;
- while( *lin && omatch(&lin, pat) )
- ;
-
- /* 'lin' now points to the character that mad us fail. Now
- * go on to process the rest of the string. A problem here is
- * a character following the closure which could have been in
- * the closure. For example, in the pattern '[a-z]*t', the final
- * 't' would have been sucked up while in the loop. So if the
- * match fails, we back up a notch a try to match the rest of
- * the string again, repeating the process recursively until we
- * get to the beginning of the closure. The recursion goes
- * at most two levels deep.
- */
-
- if(pat = pat->next) {
- while( bocl <= lin ) {
- if( rval = amatch(lin, pat, boln) ) {
- return( rval ) ; /* success */
- }
- else
- --lin ;
- }
- return(0) ; /* matched failed */
- }
- }
- else if ( omatch( &lin, pat, boln) ) {
- pat = pat->next ;
- }
- else {
- return( 0 ) ;
- }
- }
-
- /* Note that omatch() advances lin to point at the next character
- * to be matched; consequently, when we reach the end of the template,
- * lin will be pointing at the character following the last character
- * matched. The exceptions are tempaltes containing only a BOl or EOL
- * token. In these cases omatch doesn't advance.
- *
- * So, decrement lin to make it point at the end of the matched string
- * as long as this doesn't take us past the beginning of the string.
- */
-
- return( max(strstart, --lin) ) ;
- }
-
- /*---------------------------------------------------------------*/
- delete( ch, str )
- int ch ;
- register char *str ;
- {
-
- /* delete the first occurance of the character from the string
- * and move everything else over a notch to fill the hole
- */
-
- ch &= 0xff ;
- while( *str && *str != ch)
- str++ ;
- while( *str ) {
- *str = *(str+1) ;
- str++ ;
- }
- }
-
- /*--------------------------------------------------------------*/
- int dodash( delim, src, dest, maxccl )
- int delim, maxccl ;
- char **src, *dest ;
- {
-
- /* Expand the set pointed to by *src into dest. Stop at delim.
- * Return 0 on error or the size of the character class on success.
- * Update *src to point at delim. A set can have on element {x} or
- * several elements ( {abcdefghijklmn} and {a-n} are equivalent).
- * Note: the dash notation is expanded as sequential numbers. This
- * means that the set {a-Z} will contain the extra characters [\]^_ and
- * ` . The maximum number of characters is defined by maxccl
- */
-
- register char *dstart ;
- register int k, at_begin ;
- char *sptr, ctest ;
-
- dstart = dest ;
- sptr = *src ;
- at_begin = 1 ;
-
- while( *sptr && (*sptr != delim ) && (dest-dstart < maxccl) ) {
-
- if( *sptr == ESCAPE ) { /* handle escape \ characters*/
- *dest++ = esc( &sptr ) ;
- sptr++ ;
- }
-
- else if ( *sptr != '-' ) { /* handle normal characters */
- *dest++ = *sptr++ ;
- }
-
- else if ( at_begin || *(sptr+1) == delim ) {
- *dest++ = '-' ; /* literal '-' */
- }
-
- else if ( *(sptr-1) <= *(sptr+1) ) {
- sptr++ ;
- for( k = *(sptr-2) ; ++k <= *sptr ; )
- *dest++ = k ;
- sptr++ ;
- }
-
- else {
- return(0) ;
- }
- at_begin = 0 ;
- }
- *dest++ = '\000' ;
- *src = sptr ;
-
- return( dest - dstart ) ;
- }
-
- /*-----------------------------------------------------------------*/
- int esc(s)
- char **s ;
- {
- register int rval ;
-
- /* Map escape sequences onto their actual ASCII values. If no
- * escape prefix is present, s is untouched and *s is returned,
- * otherwise **s is advanced to point to the escaped character
- * and the translated character is returned
- */
-
- if ( **s != ESCAPE ) {
- rval = **s ;
- }
- else {
- (*s)++ ;
- switch( toupper(**s) ) {
- case '\000' : rval = ESCAPE ; break ;
- case 'S' : rval = ' ' ; break ;
- case 'N' : rval = '\n' ; break ;
- case 'T' : rval = '\t' ; break ;
- case 'B' : rval = '\b' ; break ;
- case 'R' : rval = '\r' ; break ;
- default : rval = **s ; break ;
- }
- }
- return( rval) ;
- }
-
- /*----------------------------------------------------------------*/
- TOKEN *getpat( arg )
- char *arg ;
- {
-
- /* Translate arg into a token string */
-
- return( makepat( arg, '\000' ) ) ;
- }
-
- /*-----------------------------------------------------------------*/
- insert( ch, str )
- int ch ;
- register char *str ;
- {
-
- /* Insert ch into string at the place pointed to by str. Move
- * everything else over a notch
- */
-
- register char *bp ;
-
- bp = str ;
- while( *str ) /* find the end of the string */
- str++ ;
- do { /* move the string over a notch */
- *(str+1) = *str ;
- str-- ;
- } while ( str >= bp ) ;
- *bp = ch ;
- }
-
- /*------------------------------------------------------------------*/
- char *in_string( delim, str )
- register int delim ;
- register char *str ;
- {
-
- /* return a pointer to delim if it is in the string, 0 otherwise */
-
- delim &= 0x7f ;
- while( *str && *str != delim )
- str++ ;
- return( *str ? str : 0 ) ;
- }
-
- /*-------------------------------------------------------------------*/
- int isalphanum(c)
- int c ;
- {
- return( ('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9') ) ;
- }
-
- /*--------------------------------------------------------------------*/
- TOKEN *makepat( arg, delim )
- char *arg ;
- int delim ;
- {
-
- /* Make a pattern template from the string pointed to by arg. Stop
- * when delim or '\000' or '\n' is found is arg. Reurn a pointer to
- * the pattern template.
- *
- * The pattern template used here is somewhat different from those
- * used in the book; each token is a structure of the form TOKEN.
- * A token consists of an identifier, a pointer to a string, a literal
- * character and a pointer to the next token. This last is 0 if there is
- * no additional token.
- *
- */
-
- TOKEN *head, *tail ;
- TOKEN *ntok ;
- char buf[CLS_SIZE] ;
- int error ;
-
- /* Check for characters thatt aren't legal at the begiining of a token */
-
- if ( *arg == '\0' || *arg == delim || *arg == '\n' || *arg == CLOSURE)
- return(0) ;
-
- error = 0 ;
- head = 0 ;
- tail = 0 ;
- while( *arg && *arg != delim && *arg != '\n' && !error ) {
- ntok = malloc ( TOKSIZE ) ;
- ntok->string = &(ntok->lchar) ;
- ntok->lchar = '\000' ;
- ntok->next = 0 ;
-
- switch( *arg ) {
-
- case ANY :
- ntok->tok = ANY ;
- break ;
-
- case BOL :
- if( head == 0 ) /* this is the first symbol */
- ntok->tok = BOL ;
- else
- error = 1 ;
- break ;
-
- case EOL :
- if( *(arg+1) == delim || *(arg+1) == '\000' || *(arg+1) == '\n' )
- ntok->tok = EOL ;
- else
- error = 1 ;
- break ;
-
- case CCL :
- if ( *(arg+1) == NEGATE ) {
- ntok->tok = NCCL ;
- arg += 2 ;
- }
- else {
- ntok->tok = CCL ;
- arg++ ;
- }
- error = dodash( CCLEND, &arg, buf, CLS_SIZE) ;
- if ( error != 0 ) {
- ntok->string = malloc( error ) ;
- strcpy( ntok->string, buf ) ;
- error = 0 ;
- }
- break ;
-
- case CLOSURE:
- if( head != 0 ) {
- switch( tail->tok ) {
-
- case BOL:
- case EOL:
- case CLOSURE:
- return(0) ;
- default:
- ntok->tok = CLOSURE ;
- }
- }
- break ;
-
- default:
- ntok->tok = LITCHAR ;
- ntok->lchar = esc( &arg ) ;
- }
-
-
- if( error || ntok == 0 ) {
- unmakepat( head ) ;
- return( 0 ) ;
- }
- else if ( head == 0 ) {
- ntok->next = 0 ; /* this is the first node in the chain */
- head = tail = ntok ;
- }
- else if ( ntok->tok != CLOSURE ) {
- tail->next = ntok ; /* Insert at the end of the list */
- ntok->next = tail ;
- tail = ntok ;
- }
- else if( head != tail ) {
- (tail->next)->next = ntok ; /* More than one node in the chain. */
- ntok->next = tail ; /* Insert the CLOSURE node immediately */
- /* in front of the tail */
- }
- else {
- ntok->next = head ; /* Only one node in the chain, Insert */
- tail->next = ntok ; /* node at the head of the linked list*/
- head = ntok ;
- }
- arg++ ;
- }
- tail->next = 0 ;
- return( head ) ;
- }
-
- /*-----------------------------------------------------------------------*/
- char *matchs( line, pat, ret_endp )
- char *line ;
- TOKEN *pat ;
- int ret_endp ;
- {
-
- /* Compares line and pattern. Line is a character string while pat
- * is a pattern template made by getpat(). Returns:
- * (1) a 0 if no match is found
- * (2) a pointer to the last character satisfying the match if
- * ret_endp is non-zero
- * (3) A pointer to the beggining of the matched string if ret_endp
- * is non-zero
- */
-
- char *rval, *bptr ;
- bptr = line ;
- while( *line ) {
- if( (rval = amatch(line, pat, bptr)) == 0 ) {
- line++ ;
- }
- else {
- rval = ret_endp ? rval : line ;
- break ;
- }
- }
- return(rval) ;
- }
-
- /*--------------------------------------------------------------------------*/
- stoupper( str )
- char *str ;
- {
-
- /* Map the entire string pointed to by str to upper case */
- char *rval ;
-
- rval = str ;
- while( *str ) {
- if( 'a' <= *str && *str <= 'z' )
- *str -= ('a' - 'A') ;
- str++ ;
- }
- }
-
- /*---------------------------------------------------------------------------*/
- int max( x, y )
- int x, y ;
- {
- return( (x>y) ? x : y ) ;
- }
-
- /*----------------------------------------------------------------------------*/
- int omatch( linp, pat, boln)
- char **linp, *boln ;
- TOKEN *pat ;
- {
-
- /* Match one pattern element, pointed at by pat, with the character
- * at **linp. Return non-zero on match. Otherwise return 0. Linp is
- * advanced to skip over the matched cahracter; it is not advanced on
- * failure. The amount of teh match is 0 for matches to null strings,
- * or 1 otherwise. "boln" should point at the position at the position
- * that will match a BOL token.
- */
-
- register int advance ;
-
- advance = -1 ;
- if( **linp ) {
- switch( pat->tok ) {
- case LITCHAR:
- if( **linp == pat->lchar )
- advance = 1 ;
- break ;
-
- case BOL:
- if( **linp == boln )
- advance = 0 ;
- break ;
-
- case ANY:
- if( **linp != '\n' )
- advance = 1 ;
- break ;
-
- case EOL:
- if( **linp == '\n' )
- advance = 0 ;
- break ;
-
- case CCL:
- if( in_string( **linp, pat->string) )
- advance = 1 ;
- break ;
-
- case NCCL:
- if( ! in_string( **linp, pat->string) )
- advance = 1 ;
- break ;
-
- default:
- printf("omatch: can't happen\n") ;
- }
- }
- if( advance >= 0 )
- *linp += advance ;
- return( ++advance ) ;
- }
-
- /*---------------------------------------------------------------------*/
- pr_line( ln )
- register char *ln ;
- {
- for( ; *ln ; ln++ ) {
- if( (' ' <= *ln) && (*ln <= '~') )
- putchar( *ln ) ;
- else {
- printf("\\0x%02x", *ln) ;
- if( *ln == '\n' )
- putchar( '\n' ) ;
- }
- }
- }
-
- /*--------------------------------------------------------------------*/
- pr_tok( head )
- TOKEN *head ;
- {
- register char *str ;
-
- for ( ; head ; head = head->next ) {
- switch( head->tok ) {
-
- case BOL:
- str = "BOL" ;
- break ;
-
- case EOL:
- str = "EOL" ;
- break ;
-
- case ANY:
- str = "ANY" ;
- break ;
-
- case LITCHAR:
- str = "LITCHAR" ;
- break ;
-
- case ESCAPE:
- str = "ESCAPE" ;
- break ;
-
- case CCL:
- str = "CCL" ;
- break ;
-
- case CCLEND:
- str = "CCLEND" ;
- break ;
-
- case NEGATE:
- str = "NEGATE" ;
- break ;
-
- case NCCL:
- str = "NCCL" ;
- break ;
-
- case CLOSURE:
- str = "CLOSURE" ;
- break ;
-
- default:
- str = "***** unknown *****" ;
- break ;
- }
- printf("%-8s at: 0x%x, ",str, head ) ;
- if ( head->tok == CCL || head->tok == NCCL )
- printf("string = [ %s ] =, ",head->string) ;
- else if (head->tok == LITCHAR )
- printf("lchar = %c, ",head->lchar) ;
- printf("next = 0x%x\n", head->next) ;
- }
- putchar('\n') ;
- }
-
- /*-----------------------------------------------------------------------*/
- unmakepat( head )
- TOKEN *head ;
- {
- register TOKEN *old_head ;
-
- while( head ) {
- switch (head->tok) {
-
- case CCL:
- case NCCL:
- free(head->string) ;
- default:
- old_head = head ;
- head = head->next ;
- free(old_head) ;
- break ;
- }
- }
- }
-
-
-
-
-